import requests
from lxml import etree
from time import sleep

start_url = "https://www.thepaper.cn/channel_25951"

headers = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
    "Referer": "https://www.thepaper.cn/"
}

respons = requests.get(url=start_url, headers=headers)

html = etree.HTML(respons.content.decode())
news_urls_list = html.xpath("//div[@class='news_li']/div[@class='news_tu']/a/@href")
print(news_urls_list)
print("="*30)

page = 1

while True:
    page += 1
    next_url = "https://www.thepaper.cn/load_index.jsp?nodeids=25434,25436,25433,25438,25435,25437,27234,25485,25432,&topCids=2170161,2170099,2169118&pageidx="
    next_url += str(page)
    print(next_url)
    print("="*30)
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36",
        "Referer": start_url
    }
    respons = requests.get(url=next_url, headers=headers)
    html = etree.HTML(respons.content.decode())
    news_urls_list = html.xpath("//div[@class='news_li']/div[@class='news_tu']/a/@href")
    if not news_urls_list:
        break
    print("="*30)
    print(news_urls_list)
    sleep(8)
